str(pigs)
## Time-Series [1:188] from 1980 to 1996: 76378 71947 33873 96428 105084 ...
head(pigs)
## Jan Feb Mar Apr May Jun
## 1980 76378 71947 33873 96428 105084 95741
ses_pigs <- ses(pigs, h = 4)
# see how SES model was fitted
ses_pigs$model
## Simple exponential smoothing
##
## Call:
## ses(y = pigs, h = 4)
##
## Smoothing parameters:
## alpha = 0.2971
##
## Initial states:
## l = 77260.0561
##
## sigma: 10308.58
##
## AIC AICc BIC
## 4462.955 4463.086 4472.665
# 95% prediction interval for the first forecast
ses_pigs$upper[1, "95%"]
## 95%
## 119020.8
ses_pigs$lower[1, "95%"]
## 95%
## 78611.97
# calculate 95% prediction interval using formula
s <- sd(ses_pigs$residuals)
ses_pigs$mean[1] + 1.96*s
## [1] 118952.8
ses_pigs$mean[1] - 1.96*s
## [1] 78679.97
# even though small compared to the data scale, the results were a little different from the results of ses function. I don't know why.
# plot the data, fitted values and forecasts.
autoplot(ses_pigs) +
autolayer(ses_pigs$fitted)
# make SES function
SES <- function(y, alpha, l0){
y_hat <- l0
for(index in 1:length(y)){
y_hat <- alpha*y[index] + (1 - alpha)*y_hat
}
cat("Forecast of next observation by SES function: ",
as.character(y_hat),
sep = "\n")
}
# compare ses and SES using pigs data
alpha <- ses_pigs$model$par[1]
l0 <- ses_pigs$model$par[2]
SES(pigs, alpha = alpha, l0 = l0)
## Forecast of next observation by SES function:
## 98816.4061115907
writeLines(paste(
"Forecast of next observation by ses function: ", as.character(ses_pigs$mean[1])
))
## Forecast of next observation by ses function: 98816.4061115907
# compare ses and SES using ausbeer data
ses_ausbeer <- ses(ausbeer, h = 1)
alpha <- ses_ausbeer$model$par[1]
l0 <- ses_ausbeer$model$par[2]
SES(ausbeer, alpha = alpha, l0 = l0)
## Forecast of next observation by SES function:
## 421.313649201742
writeLines(paste(
"Forecast of next observation by ses function: ", as.character(ses_ausbeer$mean[1])
))
## Forecast of next observation by ses function: 421.313649201742
# found that SES function worked just like ses function.
# modify SES function to return SSE
SES <- function(pars = c(alpha, l0), y){
# change the first argument as vector of alpha and l0, rather than separate alpha and l0 because optim function wants to take a function that requires vector as its first argument as fn argument.
error <- 0
SSE <- 0
alpha <- pars[1]
l0 <- pars[2]
y_hat <- l0
for(index in 1:length(y)){
error <- y[index] - y_hat
SSE <- SSE + error^2
y_hat <- alpha*y[index] + (1 - alpha)*y_hat
}
return(SSE)
}
# compare ses and SES using pigs data
# set initial values as alpha = 0.5 and l0 = first observation value of data.
opt_SES_pigs <- optim(par = c(0.5, pigs[1]), y = pigs, fn = SES)
writeLines(paste(
"Optimal parameters for the result of SES function: ",
"\n",
as.character(opt_SES_pigs$par[1]),
", ",
as.character(opt_SES_pigs$par[2]),
sep = ""
))
## Optimal parameters for the result of SES function:
## 0.299008094014243, 76379.2653476235
writeLines(paste(
"Parameters got from the result of ses function: ",
"\n",
as.character(ses_pigs$model$par[1]),
", ",
as.character(ses_pigs$model$par[2]),
sep = ""
))
## Parameters got from the result of ses function:
## 0.297148833372095, 77260.0561458528
# In this case, alphas were almost same, but l0s were different. I think that it happened because l0's scale is big.
# compare ses and SES using ausbeer data
# set initial values as alpha = 0.5 and l0 = first observation value of data.
opt_SES_ausbeer <- optim(par = c(0.5, ausbeer[1]), y = ausbeer, fn = SES)
writeLines(paste(
"Optimal parameters for the result of SES function: ",
"\n",
as.character(opt_SES_ausbeer$par[1]),
", ",
as.character(opt_SES_ausbeer$par[2]),
sep = ""
))
## Optimal parameters for the result of SES function:
## 0.148803623284766, 259.658459712619
writeLines(paste(
"Parameters got from the result of ses function: ",
"\n",
as.character(ses_ausbeer$model$par[1]),
", ",
as.character(ses_ausbeer$model$par[2]),
sep = ""
))
## Parameters got from the result of ses function:
## 0.148900381680056, 259.65918938777
# In this case, alphas were almost same regardless of the function used. And got the same result for l0s.
# modify SES function to find the optimal values of alpha and l0, and produce the next observation forecast.
SES <- function(init_pars, data){
# init_pars is c(init_alpha, init_l0)
# make next observation forecast variable
fc_next <- 0
# make SSE function to get SSE if parameters and data are given.
SSE <- function(pars, data){
error <- 0
SSE <- 0
alpha <- pars[1]
l0 <- pars[2]
y_hat <- l0
for(index in 1:length(data)){
error <- data[index] - y_hat
SSE <- SSE + error^2
y_hat <- alpha*data[index] + (1 - alpha)*y_hat
}
# use superassignment to make forecast value possible to use outside SSE function.
fc_next <<- y_hat
return(SSE)
}
# use optim function to get optimal values of alpha and l0.
optim_pars <- optim(par = init_pars, data = data, fn = SSE)
# return results
return(list(
Next_observation_forecast = fc_next,
alpha = optim_pars$par[1],
l0 = optim_pars$par[2]
))
}
# compare the result using pigs data
SES(c(0.5, pigs[1]), pigs)
## $Next_observation_forecast
## [1] 98814.63
##
## $alpha
## [1] 0.2990081
##
## $l0
## [1] 76379.27
print("Next observation forecast by ses function")
## [1] "Next observation forecast by ses function"
ses_pigs$mean[1]
## [1] 98816.41
print("alpha calculated by ses function")
## [1] "alpha calculated by ses function"
ses_pigs$model$par[1]
## alpha
## 0.2971488
print("l0 calculated by ses function")
## [1] "l0 calculated by ses function"
ses_pigs$model$par[2]
## l
## 77260.06
# compare the result using ausbeer data
SES(c(0.5, ausbeer[1]), ausbeer)
## $Next_observation_forecast
## [1] 421.3166
##
## $alpha
## [1] 0.1488036
##
## $l0
## [1] 259.6585
print("Next observation forecast by ses function")
## [1] "Next observation forecast by ses function"
ses_ausbeer$mean[1]
## [1] 421.3136
print("alpha calculated by ses function")
## [1] "alpha calculated by ses function"
ses_ausbeer$model$par[1]
## alpha
## 0.1489004
print("l0 calculated by ses function")
## [1] "l0 calculated by ses function"
ses_ausbeer$model$par[2]
## l
## 259.6592
# SES function worked similar to ses function.
# a.
str(books)
## Time-Series [1:30, 1:2] from 1 to 30: 199 172 111 209 161 119 195 195 131 183 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:2] "Paperback" "Hardcover"
head(books)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## Paperback Hardcover
## 1 199 139
## 2 172 128
## 3 111 172
## 4 209 139
## 5 161 191
## 6 119 168
autoplot(books)
# The sales of paperback and hardcover books generally increased as time went on with lots of fluctuations. But the fluctuations don't show particular frequency that they can be thought of as cycle.
# b.
ses_paperback <- ses(books[, "Paperback"], h = 4)
ses_hardcover <- ses(books[, "Hardcover"], h = 4)
autoplot(books[, "Paperback"], series = "Paperback") +
autolayer(ses_paperback, series = "Paperback") +
autolayer(books[, "Hardcover"], series = "Hardcover") +
autolayer(ses_hardcover, series = "Hardcover", PI = FALSE) +
ylab("Sales amount") +
ggtitle("Sales of paperback and hardcover books")
# can see the flat forecast by ses method.
# c.
sqrt(mean(ses_paperback$residuals^2))
## [1] 33.63769
sqrt(mean(ses_hardcover$residuals^2))
## [1] 31.93101
# RMSE values for the training data show that the variance of the residuals of hardcover sales was smaller than the one of paperback sales.
# a.
holt_paperback <- holt(books[, "Paperback"], h = 4)
holt_hardcover <- holt(books[, "Hardcover"], h = 4)
autoplot(books[, "Paperback"]) +
autolayer(holt_paperback)
autoplot(books[, "Hardcover"]) +
autolayer(holt_hardcover)
# can see the linear trend in the forecasts.
# b.
s_paperback <- sqrt(mean(holt_paperback$residuals^2))
s_hardcover <- sqrt(mean(holt_hardcover$residuals^2))
s_paperback
## [1] 31.13692
s_hardcover
## [1] 27.19358
# For both series, RMSE values became lower when Holt's method was used.
# If there is linearly approximable trend in data, it would be better to use Holt's linear method even if one more parameter is needed than SES. But if there isn't any particular trend in data, it would be better to use SES method to make the model simpler.
# c.
# I think that the forecasts of hardcover sales were better than the ones of paperback sales. Because RMSE value is lower for hardcover sales. And because the forecasts of paperback sales couldn't reflect the pattern in the data using Holt's method.
# d.
writeLines("95% PI of paperback sales calculated by holt function")
## 95% PI of paperback sales calculated by holt function
holt_paperback$upper[1, "95%"]
## 95%
## 275.0205
holt_paperback$lower[1, "95%"]
## 95%
## 143.913
writeLines("95% PI of paperback sales calculated by formula")
## 95% PI of paperback sales calculated by formula
holt_paperback$mean[1] + 1.96*s_paperback
## [1] 270.4951
holt_paperback$mean[1] - 1.96*s_paperback
## [1] 148.4384
writeLines("95% PI of hardcover sales calculated by holt function")
## 95% PI of hardcover sales calculated by holt function
holt_hardcover$upper[1, "95%"]
## 95%
## 307.4256
holt_hardcover$lower[1, "95%"]
## 95%
## 192.9222
writeLines("95% PI of hardcover sales calculated by formula")
## 95% PI of hardcover sales calculated by formula
holt_hardcover$mean[1] + 1.96*s_hardcover
## [1] 303.4733
holt_hardcover$mean[1] - 1.96*s_hardcover
## [1] 196.8745
# In this case, the prediction interval for the first forecast for each series was almost same regardless of calculating method. It is different from the ses case, in which the PI was different when it was calculated by ses function and formula respectively.
[Hint: use h=100 when calling holt() so you can clearly see the differences between the various options when plotting the forecasts.]
Which model gives the best RMSE?
str(eggs)
## Time-Series [1:94] from 1900 to 1993: 277 315 315 321 315 ...
head(eggs)
## Time Series:
## Start = 1900
## End = 1905
## Frequency = 1
## [1] 276.79 315.42 314.87 321.25 314.54 317.92
autoplot(eggs)
# can see downward trend of the price of dozen eggs in US.
# I expect that using holt function with damped = TRUE and Box-Cox options will yield best forecasts. Because I think that the price of eggs will decrease more slowly as the price is going to near 0. And there's a need to make the size of the seasonal variation smaller for bigger prices.
# First, just use holt function without using any options.
holt_eggs <- holt(eggs, h = 100)
autoplot(holt_eggs) +
autolayer(holt_eggs$fitted)
# Unrealistic because the predicted price is going to be below 0.
# Second, use holt function with damped option.
holt_damped_eggs <- holt(eggs, damped = TRUE, h = 100)
autoplot(holt_damped_eggs) +
autolayer(holt_damped_eggs$fitted)
# Now, the predicted price don't go below 0, but point forecasts didn't reflect the existing trend.
# Third, use holt function with Box-Cox transformation.
holt_BoxCox_eggs <- holt(eggs,
lambda = BoxCox.lambda(eggs),
h = 100)
autoplot(holt_BoxCox_eggs) +
autolayer(holt_BoxCox_eggs$fitted)
# Now, the point forecasts didn't go below 0 and reflected the existing trend.
# Fourth, use holt function with Box-Cox transformation and damped option.
holt_BoxCox_damped_eggs <- holt(
eggs,
damped = TRUE,
lambda = BoxCox.lambda(eggs),
h = 100)
autoplot(holt_BoxCox_damped_eggs) +
autolayer(holt_BoxCox_damped_eggs$fitted)
# The point forecasts didn't go below 0 and are still decreasing. But they didn't reflect the existing trend well. Lower ends of prediction intervals were below 0.
# show RMSE values for each model
writeLines("RMSE when using holt function")
## RMSE when using holt function
sqrt(mean(holt_eggs$residuals^2))
## [1] 26.58219
writeLines("RMSE when using holt function with damped option")
## RMSE when using holt function with damped option
sqrt(mean(holt_damped_eggs$residuals^2))
## [1] 26.54019
writeLines("RMSE when using holt function with Box-Cox transformation")
## RMSE when using holt function with Box-Cox transformation
sqrt(mean(holt_BoxCox_eggs$residuals^2))
## [1] 1.032217
writeLines("RMSE when using holt function with damped option and Box-Cox transformation")
## RMSE when using holt function with damped option and Box-Cox transformation
sqrt(mean(holt_BoxCox_damped_eggs$residuals^2))
## [1] 1.039187
# BoxCox transformation captures trend and reflects it to the forecasts. Therefore it improves accuracy of the model. Holt's method with damped option just prohibits the forecasts to be below 0, not much improving accuracy .
# The best model was the Box-Cox transformation with Holt's linear method. It gave plausible point forecasts and prediction intervals. For 100 years' prediction, Box-Cox transformation did enough damping effect. With damping option together, the point forecast couldn't follow the existing trend.
# load the data
retail <- xlsx::read.xlsx("retail.xlsx",
sheetIndex = 1,
startRow = 2)
ts_retail <- ts(retail[, "A3349873A"],
frequency = 12,
start = c(1982, 4))
# a.
autoplot(ts_retail)
# the data show that the seasonality indices increased when the retail sales increased. Multiplicative seasonality can reflect the situation in the model, while additive seasonality can't.
# b.
ets_AAM_retail <- hw(ts_retail,
seasonal = "multiplicative")
ets_AAdM_retail <- hw(ts_retail,
seasonal = "multiplicative",
damped = TRUE)
autoplot(ets_AAM_retail)
autoplot(ets_AAdM_retail)
# The forecasts increased more slowly when damped option was used than it wasn't used.
# c.
error_ets_AAM_retail <- tsCV(
ts_retail,
hw, h = 1, seasonal = "multiplicative"
)
error_ets_AAdM_retail <- tsCV(
ts_retail,
hw, h = 1, seasonal = "multiplicative", damped = TRUE
)
sqrt(mean(error_ets_AAM_retail^2, na.rm = TRUE))
## [1] 14.72762
sqrt(mean(error_ets_AAdM_retail^2, na.rm = TRUE))
## [1] 14.94306
# When the RMSE values were compared, they were almost same. Therefore I prefer damped model because it will prohibit the limitless increase of sales forecast.
# d.
checkresiduals(ets_AAdM_retail)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' multiplicative method
## Q* = 42.932, df = 7, p-value = 3.437e-07
##
## Model df: 17. Total lags used: 24
# Unfortunately, the residuals from the best method don't look like white noise. Ljung-Box test result and ACF plot show that the residuals aren't white noise.
# e.
ts_retail_train <- window(ts_retail,
end = c(2010, 12))
ts_retail_test <- window(ts_retail,
start = 2011)
# try Holt-Winters' method with damped option.
ets_AAdM_retail_train <- hw(ts_retail_train,
h = 36,
seasonal = "multiplicative",
damped = TRUE)
autoplot(ets_AAdM_retail_train)
accuracy(ets_AAdM_retail_train, ts_retail_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4556121 8.681456 6.24903 0.2040939 3.151257 0.3916228
## Test set 94.7346169 111.911266 94.73462 24.2839784 24.283978 5.9369594
## ACF1 Theil's U
## Training set -0.01331859 NA
## Test set 0.60960299 1.90013
# When I used Holt-Winters' method with damped option, I couldn't beat seasonal naive approach.
# try Holt-Winters' method.
ets_AAM_retail_train <- hw(ts_retail_train,
h = 36,
seasonal = "multiplicative")
autoplot(ets_AAM_retail_train)
accuracy(ets_AAM_retail_train, ts_retail_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.03021223 9.107356 6.553533 0.001995484 3.293399 0.4107058
## Test set 78.34068365 94.806617 78.340684 19.945024968 19.945025 4.9095618
## ACF1 Theil's U
## Training set 0.02752875 NA
## Test set 0.52802701 1.613903
# When I used Holt-Winters' method without damped option, I could get better accuracy than when I used the option.
# But it still couldn't beat the seasonal naive approach.
# In this case, damped Holt-Winters' method was worse than Holt-Winters' method because the actual sales amount in the forecast horizon was exponentially increasing, not damping.
# I think that this case reflects the fact that the assumption behind the chosen forecast method should be right to forecast more accurately.
#stl_ets_retail_train <- ts_retail_train %>%
# stl(s.window = 13, robust = TRUE) %>%
# forecast(method = "ets",
# h = 36,
# lambda = BoxCox.lambda(ts_retail_train))
# Fitted values and forecasts of above code yields far bigger values. It looked like the lambda in forecast function just do back-transform if input model doesn't use lambda option.
# I wonder if the forecast function assumes that the model entered is already transformed if lambda isn't designated in the model.
fc_stl_ets_retail_train <- ts_retail_train %>%
stlm(
#made stl model first
s.window = 13,
robust = TRUE,
#designate that the seasonally adjusted data should be forecasted by ETS method.
method = "ets",
lambda = BoxCox.lambda(ts_retail_train)
) %>%
#forecast using stl model
forecast(
h = 36,
lambda = BoxCox.lambda(ts_retail_train)
)
## Warning in InvBoxCox(fcast$mean, lambda, biasadj, fcast): biasadj information
## not found, defaulting to FALSE.
# I didn't need to use seasadj function because forecasts of STL objects are applying non-seasonal forecasting method to the seasonally adjusted data automatically.
autoplot(fc_stl_ets_retail_train)
accuracy(fc_stl_ets_retail_train, ts_retail_test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.6782982 8.583559 5.918078 -0.3254076 2.913104 0.3708823
## Test set 82.1015276 98.384220 82.101528 21.0189982 21.018998 5.1452516
## ACF1 Theil's U
## Training set 0.02704667 NA
## Test set 0.52161725 1.679783
# ETS forecasting after STL decomposition with Box-Cox transformation yielded better result than when ETS(A, Ad, M) was used. But the method was a little worse than ETS(A, A, M). It still couldn't beat seasonal naive method.
# try forecasting without doing transformation.
fc_stl_ets_retail_train_without_tr <-
ts_retail_train %>%
stlm(
s.window = 13,
robust = TRUE,
method = "ets"
) %>%
forecast(h = 36)
autoplot(fc_stl_ets_retail_train_without_tr)
accuracy(fc_stl_ets_retail_train_without_tr,
ts_retail_test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.5020795 10.00826 6.851597 -0.391432 3.489759 0.4293853
## Test set 74.2529959 91.04491 74.252996 18.837766 18.837766 4.6533890
## ACF1 Theil's U
## Training set 0.09741266 NA
## Test set 0.48917501 1.549271
# Without doing transformation, when I got accuracy using test set I got better result. But I couldn't expect it because when I also used transformation, the accuracy of training set was better. In fact, the actual values in forecast horizon increased exponentially.
# Without using transformation, the forecast could reflect the fact that the bigger values have bigger variation and it was useful at forecasting at the time.
# ETS forecasting after STL decomposition 'without' Box-Cox transformation yielded better result than when ETS(A, Ad, M) or ETS(A, A, M) was used. But the method also couldn't beat seasonal naive method.
# a.
str(ukcars)
## Time-Series [1:113] from 1977 to 2005: 330 371 271 344 358 ...
head(ukcars)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1977 330.371 371.051 270.670 343.880
## 1978 358.491 362.822
autoplot(ukcars)
# The data have trend and quarterly seasonality.
# b.
seasadj_ukcars <- ukcars %>% stl(s.window = 4, robust = TRUE) %>% seasadj()
autoplot(seasadj_ukcars)
# The variations in seasonally adjusted data are smaller.
# c.
stlf_ets_AAdN_ukcars <- ukcars %>% stlf(h = 8, etsmodel = "AAN", damped = TRUE)
autoplot(stlf_ets_AAdN_ukcars)
# d.
stlf_ets_AAN_ukcars <- ukcars %>% stlf(h = 8, etsmodel = "AAN", damped = FALSE)
autoplot(stlf_ets_AAN_ukcars)
# e.
ets_ukcars <- ets(ukcars)
summary(ets_ukcars)
## ETS(A,N,A)
##
## Call:
## ets(y = ukcars)
##
## Smoothing parameters:
## alpha = 0.6199
## gamma = 1e-04
##
## Initial states:
## l = 314.2568
## s = -1.7579 -44.9601 21.1956 25.5223
##
## sigma: 25.9302
##
## AIC AICc BIC
## 1277.752 1278.819 1296.844
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.313884 25.23244 20.17907 -0.1570979 6.629003 0.6576259
## ACF1
## Training set 0.02573334
# got ETS(A, N, A) model.
autoplot(forecast(ets_ukcars, h = 8))
# f.
writeLines("")
print("Accuracy of STL + ETS(A, Ad, N) model")
## [1] "Accuracy of STL + ETS(A, Ad, N) model"
accuracy(stlf_ets_AAdN_ukcars)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.551267 23.32113 18.48987 0.04121971 6.042764 0.602576 0.02262668
print("Accuracy of STL + ETS(A, A, N) model")
## [1] "Accuracy of STL + ETS(A, A, N) model"
accuracy(stlf_ets_AAN_ukcars)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.3412727 23.295 18.1605 -0.5970778 5.98018 0.5918418 0.02103582
print("Accuracy of ETS(A, N, A) model")
## [1] "Accuracy of ETS(A, N, A) model"
accuracy(ets_ukcars)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.313884 25.23244 20.17907 -0.1570979 6.629003 0.6576259
## ACF1
## Training set 0.02573334
# STL + ETS(A, Ad, N) was the best model.
# g.
# I think that the forecasts from the STL + ETS(A, Ad, N) model were the most reasonable ones. I think so because the forecasts best reflected the not-increasing and smaller-variation trend after the fall of 2001.
# h.
checkresiduals(stlf_ets_AAdN_ukcars)
## Warning in checkresiduals(stlf_ets_AAdN_ukcars): The fitted degrees of freedom
## is based on the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(A,Ad,N)
## Q* = 24.138, df = 3, p-value = 2.337e-05
##
## Model df: 5. Total lags used: 8
# There are still some autocorrelations in the residuals. And they don't look like normally distributed.
# a.
str(visitors)
## Time-Series [1:240] from 1985 to 2005: 75.7 75.4 83.1 82.9 77.3 ...
head(visitors)
## May Jun Jul Aug Sep Oct
## 1985 75.7 75.4 83.1 82.9 77.3 105.7
autoplot(visitors)
ggseasonplot(visitors)
# Can see general increasing trend and monthly seasonality. And I can also find the dramatic decrease in May, 2003.
# b.
visitors_train <- subset(visitors,
end = length(visitors) - 24)
visitors_test <- subset(visitors,
start = length(visitors) - 23)
hw_mul_visitors_train <- hw(visitors_train,
h = 24,
seasonal = "multiplicative")
# c.
autoplot(hw_mul_visitors_train)
# Can see that the seasonality effect increased as the number of visitors increased. Additive seasonality can't reflect the situation to the model and to the forecast.
# d.
# d-1. an ETS model;
fc_ets_visitors_train <- forecast(ets(visitors_train), h = 24)
autoplot(fc_ets_visitors_train)
# d-2. an additive ETS model applied to a Box-Cox transformed series;
fc_ets_add_BoxCox_visitors_train <- forecast(
ets(visitors_train,
lambda = BoxCox.lambda(visitors_train),
additive.only = TRUE),
h = 24
)
autoplot(fc_ets_add_BoxCox_visitors_train)
# d-3. a seasonal naive method;
fc_snaive_visitors_train <- snaive(visitors_train, h = 24)
autoplot(fc_snaive_visitors_train)
# d-4. an STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data.
fc_BoxCox_stl_ets_visitors_train <- visitors_train %>%
stlm(
lambda = BoxCox.lambda(visitors_train),
s.window = 13,
robust = TRUE,
method = "ets"
) %>%
forecast(h = 24)
autoplot(fc_BoxCox_stl_ets_visitors_train)
# e.
accuracy(hw_mul_visitors_train, visitors_test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.9749466 14.06539 10.35763 -0.5792169 4.223204 0.3970304
## Test set 72.9189889 83.23541 75.89673 15.9157249 17.041927 2.9092868
## ACF1 Theil's U
## Training set 0.1356528 NA
## Test set 0.6901318 1.151065
accuracy(fc_ets_visitors_train, visitors_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.7640074 14.53480 10.57657 0.1048224 3.994788 0.405423
## Test set 72.1992664 80.23124 74.55285 15.9202832 16.822384 2.857773
## ACF1 Theil's U
## Training set -0.05311217 NA
## Test set 0.58716982 1.127269
accuracy(fc_ets_add_BoxCox_visitors_train, visitors_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.001363 14.97096 10.82396 0.1609336 3.974215 0.4149057
## Test set 69.458843 78.61032 72.41589 15.1662261 16.273089 2.7758586
## ACF1 Theil's U
## Training set -0.02535299 NA
## Test set 0.67684148 1.086953
accuracy(fc_snaive_visitors_train, visitors_test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 17.29363 31.15613 26.08775 7.192445 10.285961 1.000000 0.6327669
## Test set 32.87083 50.30097 42.24583 6.640781 9.962647 1.619375 0.5725430
## Theil's U
## Training set NA
## Test set 0.6594016
accuracy(fc_BoxCox_stl_ets_visitors_train, visitors_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.5803348 13.36431 9.551391 0.08767744 3.51950 0.3661256
## Test set 76.3637263 84.24658 78.028992 16.87750474 17.51578 2.9910209
## ACF1 Theil's U
## Training set -0.05924203 NA
## Test set 0.64749552 1.178154
# The result when the models are rated according to accuracy using test set:
# snaive > additive ETS with BoxCox transformation - ETS(A, A, A) > STL + ETS(M, A, N) with BoxCox transformation > ETS (M, Ad, M) > Holt-Winters' multiplicative method
# f.
# first, make functions to make model to yield forecast class object
fets_add_BoxCox <- function(y, h) {
forecast(ets(
y,
lambda = BoxCox.lambda(y),
additive.only = TRUE
),
h = h)
}
fstlm <- function(y, h) {
forecast(stlm(
y,
lambda = BoxCox.lambda(y),
s.window = frequency(y) + 1,
robust = TRUE,
method = "ets"
),
h = h)
}
fets <- function(y, h) {
forecast(ets(y),
h = h)
}
# I'll compare the models using RMSE
sqrt(mean(tsCV(visitors, snaive, h = 1)^2, na.rm = TRUE))
## [1] 32.78675
sqrt(mean(tsCV(visitors, fets_add_BoxCox, h = 1)^2,
na.rm = TRUE))
## [1] 18.86439
sqrt(mean(tsCV(visitors, fstlm, h = 1)^2,
na.rm = TRUE))
## [1] 17.49642
sqrt(mean(tsCV(visitors, fets, h = 1)^2, na.rm = TRUE))
## [1] 18.52985
sqrt(mean(tsCV(visitors, hw, h = 1,
seasonal = "multiplicative")^2,
na.rm = TRUE))
## [1] 19.62107
# tsCV errors show that the best model is the STL + ETS(M, A, N) model and the worst model is seasonal naive model. If I hadn't calculated accuracy using test set, I couldn't have known that the forecasts from seasonal naive method were the most accurate ones.
fets <- function(y, h) { forecast(ets(y), h = h) }
# ausbeer data case
str(ausbeer)
## Time-Series [1:218] from 1956 to 2010: 284 213 227 308 262 228 236 320 272 233 ...
head(ausbeer)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1956 284 213 227 308
## 1957 262 228
# ausbeer are quarterly data
autoplot(ausbeer)
# it looked like it would be better to use BoxCox transformation option in stlf function.
ausbeer_train <- subset(
ausbeer, end = length(ausbeer) - 12
)
ausbeer_test <- subset(
ausbeer, start = length(ausbeer) - 11
)
# try each model and forecast.
ets_ausbeer_train <- forecast(
ets(ausbeer_train), h = 12
)
snaive_ausbeer_train <- snaive(ausbeer_train, h = 12)
stlf_ausbeer_train <- stlf(
ausbeer_train,
h = 12,
s.window = 5,
robust = TRUE,
lambda = BoxCox.lambda(ausbeer_train))
# choose best model using test set
accuracy(ets_ausbeer_train, ausbeer_test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3466095 15.781973 11.979955 -0.064073435 2.864311 0.7567076
## Test set 0.1272571 9.620828 8.919347 0.009981598 2.132836 0.5633859
## ACF1 Theil's U
## Training set -0.1942276 NA
## Test set 0.3763918 0.1783972
accuracy(snaive_ausbeer_train, ausbeer_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 3.336634 19.67005 15.83168 0.9044009 3.771370 1.0000000
## Test set -2.916667 10.80509 9.75000 -0.6505927 2.338012 0.6158537
## ACF1 Theil's U
## Training set 0.0009690785 NA
## Test set 0.3254581810 0.1981463
accuracy(stlf_ausbeer_train, ausbeer_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.5704833 13.61609 9.816091 0.1279204 2.334194 0.6200283
## Test set -4.0433841 10.11436 8.429747 -0.9670308 2.051915 0.5324605
## ACF1 Theil's U
## Training set -0.1131945 NA
## Test set 0.2865992 0.1689574
# Without RMSE, all the other errors show that the best model is STL + ETS(M, Ad, N).
autoplot(stlf_ausbeer_train) +
autolayer(ausbeer_test)
# The forecasts are similar to real data.
# make a function to automatically fit models to given data and calculate accuracy using test set for each model.
forecast.models <- function(y, h){
# inputs : y - data, h - forecast horizon of train set or the length of the test set(number of years)
# outputs :
# train - train set of data y,
# test - test set of data y,
# m - frequency of data y,
# models - fitted and forecasted models,
# $ets - fitted by ets function and then forecasted.
# $snaive - data to snaive function
# $stlf - data to stlf function
# $stlf_with_BoxCox - data to stlf function. But in this case, BoxCox transformation option is used.
# accuracies - accuracy of the models using the test set,
# $acc_ets - accuracy from ets model,
# $acc_snaive - accuracy from snaive model,
# $acc_stlf - accuracy from stlf model,
# $acc_stlf_with_BoxCox - accuracy from stlf model with BoxCox transformation option.
# get frequency of data
m <- frequency(y)
y_train <- subset(
y, end = length(y) - m*h
)
y_test <- subset(
y, start = length(y) - m*h + 1
)
# try each model and forecast.
ets_y_train <- forecast(
ets(y_train), h = m*h
)
snaive_y_train <- snaive(y_train, h = m*h)
stlf_y_train <- stlf(
y_train,
h = m*h,
s.window = m + 1,
robust = TRUE
)
stlf_y_train_with_BoxCox <- stlf(
y_train,
h = m*h,
s.window = m + 1,
robust = TRUE,
lambda = BoxCox.lambda(y_train))
# combine forecasts to models variable
models <- list(ets_y_train,
snaive_y_train,
stlf_y_train,
stlf_y_train_with_BoxCox)
names(models) <- c("ets", "snaive", "stlf", "stlf_with_BoxCox")
# get accuracy for each model using test set
acc_ets <- accuracy(ets_y_train, y_test)
acc_snaive <- accuracy(snaive_y_train, y_test)
acc_stlf <- accuracy(stlf_y_train, y_test)
acc_stlf_with_BoxCox <- accuracy(stlf_y_train_with_BoxCox, y_test)
# combine accuracies to accuracies variable.
accuracies <- list(acc_ets,
acc_snaive,
acc_stlf,
acc_stlf_with_BoxCox)
names(accuracies) <- c("acc_ets", "acc_snaive", "acc_stlf", "acc_stlf_with_BoxCox")
# return output values
output <- list(y_train, y_test, m, models, accuracies)
names(output) <- c("train", "test", "m", "models", "accuracies")
return(output)
}
# bricksq data case
fc_bricksq <- forecast.models(bricksq, 3)
fc_bricksq$accuracies
## $acc_ets
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.228708 21.96146 15.81586 0.2606171 3.912882 0.4299638 0.2038074
## Test set 37.682916 42.36696 37.68292 8.4157549 8.415755 1.0244329 0.6190546
## Theil's U
## Training set NA
## Test set 1.116608
##
## $acc_snaive
## ME RMSE MAE MPE MAPE MASE
## Training set 6.438849 50.25482 36.78417 1.430237 8.999949 1.0000000
## Test set 15.333333 37.15284 33.66667 3.231487 7.549326 0.9152487
## ACF1 Theil's U
## Training set 0.81169827 NA
## Test set -0.09314867 0.9538702
##
## $acc_stlf
## ME RMSE MAE MPE MAPE MASE
## Training set 1.318736 21.49082 14.41766 0.2855898 3.501287 0.3919528
## Test set 44.266931 50.20039 45.89301 10.0933739 10.487099 1.2476294
## ACF1 Theil's U
## Training set 0.1502759 NA
## Test set 0.1053316 1.31732
##
## $acc_stlf_with_BoxCox
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.42724 21.40466 14.30375 0.3185472 3.500221 0.3888560 0.1625098
## Test set 34.69481 40.13618 36.34649 7.7322091 8.132133 0.9881014 0.4746242
## Theil's U
## Training set NA
## Test set 1.04561
# All errors show that the best model is seasonal naive method.
autoplot(fc_bricksq$models$snaive) +
autolayer(fc_bricksq$test)
# In about an year the forecasts were similar to real data, but after that the real data increased exponentially while the trend of forecasts didn't change. But real data were still in the 80% prediction interval.
# dole data case
fc_dole <- forecast.models(dole, 3)
fc_dole$accuracies
## $acc_ets
## ME RMSE MAE MPE MAPE MASE
## Training set 98.00253 16130.58 9427.497 0.5518769 6.20867 0.2995409
## Test set 221647.42595 279668.65 221647.426 32.5447637 32.54476 7.0424271
## ACF1 Theil's U
## Training set 0.5139536 NA
## Test set 0.9394267 11.29943
##
## $acc_snaive
## ME RMSE MAE MPE MAPE MASE
## Training set 12606.58 56384.06 31473.16 3.350334 27.73651 1.000000
## Test set 160370.33 240830.92 186201.00 20.496197 27.38678 5.916184
## ACF1 Theil's U
## Training set 0.9781058 NA
## Test set 0.9208325 9.479785
##
## $acc_stlf
## ME RMSE MAE MPE MAPE MASE
## Training set -96.4811 7801.958 4530.06 -0.2719573 5.116149 0.1439341
## Test set 328005.9874 407547.190 328005.99 48.6435815 48.643581 10.4217690
## ACF1 Theil's U
## Training set 0.08037493 NA
## Test set 0.93373958 16.57033
##
## $acc_stlf_with_BoxCox
## ME RMSE MAE MPE MAPE MASE
## Training set 145.1468 6688.083 3659.404 0.1464869 3.82385 0.1162706
## Test set 205385.6547 268135.992 207317.238 29.3540384 29.85051 6.5871126
## ACF1 Theil's U
## Training set -0.09446256 NA
## Test set 0.93778748 10.68587
# All errors show that the best model is seasonal naive method.
autoplot(fc_dole$models$snaive) +
autolayer(fc_dole$test)
# The forecasts were completely wrong. Real data showed dramatic increase without fluctuation in the forecast horizons. But even the best model couldn't predict such change.
# a10 data case
fc_a10 <- forecast.models(a10, 3)
fc_a10$accuracies
## $acc_ets
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04378049 0.488989 0.3542285 0.2011915 3.993267 0.3611299
## Test set 1.62559331 2.562046 2.0101563 7.0011207 9.410027 2.0493197
## ACF1 Theil's U
## Training set -0.05238173 NA
## Test set 0.23062972 0.6929693
##
## $acc_snaive
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.9577207 1.177528 0.9808895 10.86283 11.15767 1.000000 0.3779746
## Test set 4.3181585 5.180738 4.3306974 19.80542 19.89352 4.415071 0.6384822
## Theil's U
## Training set NA
## Test set 1.383765
##
## $acc_stlf
## ME RMSE MAE MPE MAPE MASE
## Training set 0.06704027 0.6809938 0.4297557 0.3200743 4.732989 0.4381286
## Test set 1.83324369 3.1118106 2.4570607 7.0963340 11.223584 2.5049311
## ACF1 Theil's U
## Training set -0.01970509 NA
## Test set 0.23918322 0.8089697
##
## $acc_stlf_with_BoxCox
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01686093 0.4244591 0.3098511 0.003515845 3.594686 0.3158879
## Test set 1.25869582 2.2242431 1.8428354 5.032385108 8.701050 1.8787390
## ACF1 Theil's U
## Training set -0.1780966 NA
## Test set 0.1502463 0.5964321
# All errors show that the best model is STL + ETS(A, A, N) with BoxCox transformation model.
autoplot(fc_a10$models$stlf_with_BoxCox) +
autolayer(fc_a10$test)
# The forecasts were similar to real data in about an year's horizon. But for the rest of the forecasts, real data's values were bigger. The best model could follow the general trend, but a little short of predicting more fastly increasing trend.
# h02 data case
fc_h02 <- forecast.models(h02, 3)
fc_h02$accuracies
## $acc_ets
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001532209 0.04653434 0.03463451 0.0008075938 4.575471 0.5850192
## Test set 0.023667588 0.07667744 0.06442193 1.7152319315 7.030603 1.0881653
## ACF1 Theil's U
## Training set 0.07982687 NA
## Test set -0.12074883 0.450176
##
## $acc_snaive
## ME RMSE MAE MPE MAPE MASE
## Training set 0.03880677 0.07122149 0.05920234 5.220203 8.156509 1.00000
## Test set -0.01479486 0.08551752 0.07161055 -1.308298 7.884556 1.20959
## ACF1 Theil's U
## Training set 0.40465289 NA
## Test set 0.02264221 0.5009677
##
## $acc_stlf
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001987465 0.04609626 0.03396307 -0.1760405 4.751272 0.5736778
## Test set 0.046520958 0.09174622 0.07544541 3.5302072 8.001263 1.2743653
## ACF1 Theil's U
## Training set 0.01953927 NA
## Test set 0.05661406 0.5085785
##
## $acc_stlf_with_BoxCox
## ME RMSE MAE MPE MAPE MASE
## Training set 0.003628017 0.04230875 0.03068561 0.02682146 4.159581 0.5183175
## Test set 0.031706735 0.07574975 0.06381807 2.51742657 6.903027 1.0779653
## ACF1 Theil's U
## Training set -0.09600393 NA
## Test set -0.25297574 0.4385025
# All errors show that the best model is STL + ETS(A, Ad, N) method.
autoplot(fc_h02$models$stlf_with_BoxCox) +
autolayer(fc_h02$test)
# The forecasts were similar to real data for the most part.
# usmelec data case
fc_usmelec <- forecast.models(usmelec, 3)
fc_usmelec$accuracies
## $acc_ets
## ME RMSE MAE MPE MAPE MASE
## Training set -0.07834851 7.447167 5.601963 -0.1168709 2.163495 0.6225637
## Test set -10.20080652 15.291359 13.123441 -3.1908161 3.926338 1.4584491
## ACF1 Theil's U
## Training set 0.2719249 NA
## Test set 0.4679496 0.4859495
##
## $acc_snaive
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.921564 11.58553 8.998217 2.000667 3.511648 1.000000 0.4841860
## Test set 5.475972 17.20037 13.494750 1.391437 3.767514 1.499714 0.2692544
## Theil's U
## Training set NA
## Test set 0.4765145
##
## $acc_stlf
## ME RMSE MAE MPE MAPE MASE
## Training set -0.07064178 6.659696 4.907696 -0.07729202 1.910234 0.5454076
## Test set -11.41131201 17.776488 15.918880 -3.66455560 4.779167 1.7691150
## ACF1 Theil's U
## Training set 0.1126383 NA
## Test set 0.5099758 0.5639709
##
## $acc_stlf_with_BoxCox
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1351692 6.474779 4.761143 -0.05026415 1.848092 0.5291207
## Test set -19.9969503 24.406877 20.951753 -6.06977735 6.315723 2.3284338
## ACF1 Theil's U
## Training set 0.08255205 NA
## Test set 0.60877348 0.7777035
# Most of errors show that the best model is ETS(M, A, M) method.
autoplot(fc_usmelec$models$ets) +
autolayer(fc_usmelec$test)
# Real data were within the prediction interval for the most part.
# a. Use ets() on the following series:
#bicoal, chicken, dole, usdeaths, lynx, ibmclose, eggs.
#Does it always give good forecasts?
autoplot(forecast(ets(bicoal)))
# I think that ETS(M, N, N) is not good. It won't be useful for forecasting.
autoplot(forecast(ets(chicken)))
# I think that ETS(M, N, N) is not good. The price almost went to near 0 and no one knows whether it will go up or maintain or go down more. The model didn't yield helpful forecasts.
autoplot(forecast(ets(dole)))
# I think that ETS(M, A, M) is good. It reflected the increasing trend and existing small seasonality.
autoplot(forecast(ets(usdeaths)))
# I think that ETS(A, N, A) is good. It reflected the existing seasonality.
autoplot(forecast(ets(lynx)))
# I think that ETS(M, N, N) is good except exponentially increasing prediction interval.
autoplot(forecast(ets(ibmclose)))
# I think that ETS(A, N, N) is not good. It won't be helpful much for forecasting.
autoplot(forecast(ets(eggs)))
# I think that ETS(M, N, N) is not good. There were decreasing trend even if there were some dramatic increasing moments. I think that there should've been decreasing or damping trend in the forecasts.
#b. Find an example where it does not work well. Can you figure out why?
# It looks like ets function can't find well-fitted ETS model when there are aperiodic fluctuations(or cycles) in the data. In such cases, ets function couldn't find trend or seasonality and just yielded naive forecasts.
Show that the point forecasts from an ETS(M,A,M) model are the same as those obtained using Holt-Winters’ multiplicative method.
Show that the forecast variance for an ETS(A,N,N) model is given by \[ \sigma^{2}\left[1+\alpha^{2}(h-1)\right] \]
Write down \(95 \%\) prediction intervals for an ETS(A,N,N) model as a function of \(\ell_{T}, \alpha, h\) and \(\sigma\), assuming normally distributed errors.